/*******************************************************************************

This code file prepares the BBL table from the Property Address Directory (PAD)
for use in matching condo billing BBLs to tax-lot BBLs. This step is required
to identify which condo units are in the same building.

*******************************************************************************/

*** Manage settings

	run "~/Dropbox (MIT)/Research/NYC421a/code/modules/settings.do"
		
*** Load BBL table of PAD

	import delimited "$data/raw/PAD/bobabbl.txt", encoding(ISO-8859-1)
	
	* Destring top end of interval of BBL codes for condos
	destring hiboro hiblock hilot billboro billblock billlot, replace
	
*** Prepare crosswalk for condo BBLs to lowest BBL in tax lot

	* Create identifiers for non-overlapping sections of tax BBLs that need to be mapped to the same condo BBL
	gen lot_section = 1
	bys boro block lot: replace lot_section = lot_section[_n-1] + 1 if !missing(lot_section[_n-1])
	
	* Create many-to-one mapping for range of condo tax BBLs to lowest tax BBL
	gcollapse (min) loboro loblock lolot (max) hiboro hiblock hilot (firstnm) billboro billblock billlot, by(boro block lot lot_section)
		
	* Drop one missing observation
	drop if missing(hiboro)
	
	* Drop two observations with billing boro/blocks that appear erroneous
	drop if (billblock != block & !missing(billblock)) | (billboro != boro & !missing(billboro))
	
	* Confirm that condo BBL codes do not cross boroughs or blocks, ...
	assert loboro == hiboro
	assert loblock == hiblock
	
	* ...Implying that lo/hi borough/block codes are redundant 
	drop hiboro hiblock loboro loblock

	* Confirm that hilot >= lolot
	gen diff = hilot - lolot + 1
	assert diff > 0
		
*** Use range of lot codes to create one-to-one crosswalk
	
	* Expand data: many-to-one --> one-to-one
	expand diff
	sort boro block lot lot_section
	
	* Increment lot codes
	gen lot_ = lolot
	bys boro block lot lot_section: replace lot_ = lot_[_n-1]+1 if !missing(lot_[_n-1])
	
	* Drop temporary variables
	drop diff lolot hilot
	
	* Rename variables
	rename lot lot_new
	rename lot_ lot
	
	* In rare cases, there appears to be a data-entry issue where the same BBL is to be assigned to multiple lowest BBLs. I use the lowest
	collapse (min) lot_new bill*, by(boro block lot)
	
*** Create proper BBL codes

	tostring boro block lot_new lot billboro billblock billlot, replace
	
	foreach v of varlist billboro billblock billlot {
		replace `v' = "" if `v' == "."
	}

	foreach v of varlist block billblock {
		gen len = length(`v') if !missing(`v')
		quietly summ len
		while r(min) < 5 {
			replace `v' = "0"+`v' if length(`v') < 5 & !missing(`v')
			replace len = length(`v') if !missing(`v')
			quietly summ len
		}
		drop len
	}
	
	foreach v of varlist lot lot_new billlot {
		gen len = length(`v') if !missing(`v')
		quietly summ len
		while r(min) < 4 {
			replace `v' = "0"+`v' if length(`v') < 4  & !missing(`v')
			replace len = length(`v') if !missing(`v')
			quietly summ len
		}
		drop len
	}

	gen bbl = boro+block+lot
	gen bbl_new = boro+block+lot_new
	gen bbl_condo = billboro+billblock+billlot
	
	drop billboro billblock billlot
	
	destring bbl bbl_new boro block lot lot_new bbl_condo, replace
	
	format bbl bbl_new bbl_condo %18.0f
	order bbl bbl_new boro block lot lot_new bbl_condo
	
*** Save crosswalk

	save "$data/clean/xwalk_condo_bbl.dta", replace
